home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Magnum One
/
Magnum One (Mid-American Digital) (Disc Manufacturing).iso
/
d12
/
ddj1190.arc
/
E_FLOYD.ASC
< prev
next >
Wrap
Text File
|
1990-10-27
|
18KB
|
480 lines
_AN EXISTENTIAL DICTIONARY_
by Edwin T. Floyd
[LISTING ONE]
{$A+,B-,D-,E-,F+,I+,L-,N-,O-,R-,S-,V+}
Unit Dict;
Interface
{ DICT.PAS dictionary object and methods to create and use a superimposed
code dictionary. Copyright Edwin T. Floyd, 1990. }
Type
Dictionary = Object
DictArray : Pointer; { Pointer to dictionary bit array }
DictCount : LongInt; { Number of key entries in this dictionary }
DictSize : Word; { Number of bytes in dictionary bit array }
DictBits : Byte; { Number of bits per key entry }
Constructor Init(MaxKeys : Word; BitsPerKey : Byte);
{ Initialize dictionary, specify maximum keys and bits per key. }
Constructor RestoreDictionary(FileName : String);
{ Restore dictionary saved on disk by SaveDictionary }
{ Note: Use either Init or RestoreDictionary, not both. }
Destructor Done;
{ Release storage allocated to dictionary. }
Function DictionarySize : Word;
{ Returns number of bytes that will be written by SaveDictionary. }
Procedure SaveDictionary(FileName : String);
{ Save dictionary in a disk file. }
Function InsertString(Var s : String) : Boolean;
{ Insert string in dictionary; returns TRUE if string is already there. }
Function StringInDictionary(Var s : String) : Boolean;
{ Returns TRUE if string is in dictionary. }
Function InsertBlock(Var Data; Len : Word) : Boolean;
{ Insert block in dictionary; returns TRUE if block is already there. }
Function BlockInDictionary(Var Data; Len : Word) : Boolean;
{ Returns TRUE if block is in dictionary. }
Function InsertHash(Hash : LongInt) : Boolean;
{ Insert hash in dictionary; returns TRUE if hash is already there. }
Function HashInDictionary(Hash : LongInt) : Boolean;
{ Returns TRUE if hash is in dictionary. }
Function EstError : Real;
{ Returns estimated probability of error. }
Function ActError : Real;
{ Returns actual probability of error (slow, counts bits). }
End;
Function DictionaryBytes(MaxKeys : LongInt; BitsPerKey : Byte) : LongInt;
{ Returns the size in bytes of the optimal dictionary bit table for the
indicated key and bit-per-key counts. }
Function DictHash(Var Data; Len : Word) : LongInt;
{ Hash data block to a positive long integer. }
Implementation
Const
MagicNumber = $E501205F; { Used to validate dictionary save file }
RandMult = 16807; { =7**5; RandMult must be expressable in 16 bits.
48271 may give better "randomness" (see ACM ref.) }
ShuffleBits = 3;
ShuffleShift = 16 - ShuffleBits;
ShufTableEnd = $FFFF Shr ShuffleShift;
HashSeed : Word = 26; { Initial hash seed }
RandSeed : LongInt = 1; { Random number seed: 0 < RandSeed < 2**31-1 }
Type
SaveFileHeader = Record
{ Header for dictionary save file (all numbers are byte-reversed) }
Magic : LongInt; { Magic number for validity test }
BitsCount : LongInt; { Bits-per-key and entry count }
Size : Word; { Size of dictionary bit map in bytes }
End;
Var
ShufTable : Array[0..ShufTableEnd] Of LongInt;
NextOut : Word;
Function IRand : LongInt;
{ Return next "minimal standard", 31 bit pseudo-random integer. This function
actually computes (RandSeed * RandMult) Mod (2**31-1) where RandMult is
a 16 bit quantity and RandSeed is 32 bits (See Carta, CACM 1/90). }
Inline(
$A1/>RandSeed+2/ { mov ax,[>RandSeed+2]}
$BF/>RandMult/ { mov di,>RandMult}
$F7/$E7/ { mul di}
$89/$C3/ { mov bx,ax}
$89/$D1/ { mov cx,dx}
$A1/>RandSeed/ { mov ax,[>RandSeed]}
$F7/$E7/ { mul di}
$01/$DA/ { add dx,bx}
$83/$D1/$00/ { adc cx,0 ; cx:dx:ax = Seed * Mult }
$D0/$E6/ { shl dh,1 ; split p & q at 31 bits }
$D1/$D1/ { rcl cx,1}
$D0/$EE/ { shr dh,1 ; cx = p, dx:ax = q }
$01/$C8/ { add ax,cx}
$83/$D2/$00/ { adc dx,0 ; dx:ax = p + q }
$71/$09/ { jno done}
$05/$01/$00/ { add ax,1 ; overflow, inc(p + q) }
$83/$D2/$00/ { adc dx,0}
$80/$E6/$7F/ { and dh,$7F ; limit to 31 bits }
{done:}
$A3/>RandSeed/ { mov [>RandSeed],ax}
$89/$16/>RandSeed+2); { mov [>RandSeed+2],dx}
Function Hash(Seed : LongInt; Var Data; Len : Word) : LongInt;
{ Hash a block of data into a random long integer. This is actually
equivalent to the following:
RandSeed := Seed;
Hash := 0;
For i := 1 To Len Do Hash := Hash + (IRand * (Data[i] + $FF00);
Hash := Hash AND $7FFFFFFF;
If Hash = 0 Then Inc(Hash);
Overflow is ignored. The seed is kept in registers; RandSeed is not
affected by this routine. }
Inline(
$59/ { pop cx ; cx := len}
$5E/ { pop si ; bx:si := @data}
$5B/ { pop bx}
$58/ { pop ax ; dx:ax := seed}
$5A/ { pop dx}
$E3/$59/ { jcxz alldone}
$FC/ { cld}
$1E/ { push ds}
$8E/$DB/ { mov ds,bx}
$55/ { push bp}
$31/$DB/ { xor bx,bx}
$53/ { push bx ; zero accumulator}
$53/ { push bx}
$89/$E5/ { mov bp,sp}
{next: ; for each byte of data...}
$51/ { push cx}
$BF/>RandMult/ { mov di,>RandMult}
$89/$C3/ { mov bx,ax}
$89/$D0/ { mov ax,dx ; compute next seed}
$F7/$E7/ { mul di}
$93/ { xchg ax,bx}
$89/$D1/ { mov cx,dx}
$F7/$E7/ { mul di}
$01/$DA/ { add dx,bx}
$83/$D1/$00/ { adc cx,0 ; cx:dx:ax = Seed * Mult}
$D0/$E6/ { shl dh,1 ; split p & q at 31 bits}
$D1/$D1/ { rcl cx,1}
$D0/$EE/ { shr dh,1 ; cx = p, dx:ax = q}
$01/$C8/ { add ax,cx}
$83/$D2/$00/ { adc dx,0 ; dx:ax = p + q}
$71/$09/ { jno noovfl}
$05/$01/$00/ { add ax,1 ; overflow, inc(p + q)}
$83/$D2/$00/ { adc dx,0}
$80/$E6/$7F/ { and dh,$7F ; limit to 31 bits}
{noovfl:}
$89/$C3/ { mov bx,ax ; save seed}
$89/$D1/ { mov cx,dx}
$AC/ { lodsb ; get next byte + $FF00}
$B4/$FF/ { mov ah,$FF}
$89/$C7/ { mov di,ax}
$F7/$E1/ { mul cx ; multiply by seed}
$97/ { xchg ax,di}
$F7/$E3/ { mul bx}
$01/$FA/ { add dx,di}
$01/$46/$00/ { add [bp+0],ax ; accumulate}
$11/$56/$02/ { adc [bp+2],dx}
$89/$D8/ { mov ax,bx}
$89/$CA/ { mov dx,cx}
$59/ { pop cx}
$E2/$B9/ { loop next ; until out of data}
{;}
$58/ { pop ax}
$5A/ { pop dx}
$5D/ { pop bp}
$1F/ { pop ds}
$80/$E6/$7F/ { and dh,$7F}
{alldone:}
$89/$C3/ { mov bx,ax}
$09/$D3/ { or bx,dx}
$75/$01/ { jnz exit}
$40); { inc ax}
{exit:}
Procedure Shuffle;
{ Load the shuffle table }
Begin
For NextOut := 0 To ShufTableEnd Do ShufTable[NextOut] := IRand;
NextOut := Word(IRand) Shr ShuffleShift;
End;
Function SIRand : LongInt;
{ Return the next shuffled random number }
Var
y : LongInt;
Begin
y := ShufTable[NextOut];
ShufTable[NextOut] := IRand;
NextOut := Word(y) Shr ShuffleShift;
SIRand := y;
End;
Function TestBit(Var BitArray; Size : Word; BitNo : LongInt) : Boolean;
{ Returns TRUE if indicated bit number, modulo size of bit array, is set.
Size is in bytes. }
Inline(
{; dx:ax := BitNo}
$58/ { pop ax}
$5A/ { pop dx}
{; bl := bit mask}
$88/$C1/ { mov cl,al}
$80/$E1/$07/ { and cl,$07}
$B3/$80/ { mov bl,$80}
$D2/$EB/ { shr bl,cl}
{; dx:ax := byte offset}
$D1/$EA/ { shr dx,1}
$D1/$D8/ { rcr ax,1}
$D1/$EA/ { shr dx,1}
$D1/$D8/ { rcr ax,1}
$D1/$EA/ { shr dx,1}
$D1/$D8/ { rcr ax,1}
{; dx := byte offset}
$5F/ { pop di}
$39/$D7/ { cmp di,dx}
$77/$0E/ { ja quickdiv}
{; protect against overflow}
$89/$F9/ { mov cx,di}
{protloop:}
$D1/$E1/ { shl cx,1}
$39/$D1/ { cmp cx,dx}
$76/$FA/ { jbe protloop}
$F7/$F1/ { div cx}
$89/$D0/ { mov ax,dx}
$31/$D2/ { xor dx,dx}
{quickdiv:}
$F7/$F7/ { div di}
{; es:di := seg:ofs of byte}
$5F/ { pop di}
$01/$D7/ { add di,dx}
$07/ { pop es}
{; test bit}
$30/$C0/ { xor al,al}
$26/$22/$1D/ { es:and bl,[di]}
$74/$02/ { jz notset}
$FE/$C0); { inc al}
{notset:}
Function SetBit(Var BitArray; Size : Word; BitNo : LongInt) : Boolean;
{ Sets the indicated bit number modulo size of bit array. Returns TRUE if
bit was already set. Size is in bytes. }
Inline(
{; dx:ax := BitNo}
$58/ { pop ax}
$5A/ { pop dx}
{; bl := bit mask}
$88/$C1/ { mov cl,al}
$80/$E1/$07/ { and cl,$07}
$B3/$80/ { mov bl,$80}
$D2/$EB/ { shr bl,cl}
{; dx:ax := byte offset}
$D1/$EA/ { shr dx,1}
$D1/$D8/ { rcr ax,1}
$D1/$EA/ { shr dx,1}
$D1/$D8/ { rcr ax,1}
$D1/$EA/ { shr dx,1}
$D1/$D8/ { rcr ax,1}
{; dx := byte offset mod size }
$5F/ { pop di}
$39/$D7/ { cmp di,dx}
$77/$0E/ { ja quickdiv}
{; protect against overflow}
$89/$F9/ { mov cx,di}
{protloop:}
$D1/$E1/ { shl cx,1}
$39/$D1/ { cmp cx,dx}
$76/$FA/ { jbe protloop}
$F7/$F1/ { div cx}
$89/$D0/ { mov ax,dx}
$31/$D2/ { xor dx,dx}
{quickdiv:}
$F7/$F7/ { div di}
{; es:di := seg:ofs of byte}
$5F/ { pop di}
$01/$D7/ { add di,dx}
$07/ { pop es}
{; test bit}
$30/$C0/ { xor al,al}
$88/$DC/ { mov ah,bl}
$26/$22/$25/ { es:and ah,[di]}
$74/$04/ { jz notset}
$FE/$C0/ { inc al}
$EB/$03/ { jmp short set}
{notset:}
$26/$08/$1D); { es:or [di],bl}
{set:}
Function LongSwap(n : LongInt) : LongInt;
{ Reverse bytes in a LongInt. }
Inline(
$5A/ { pop dx}
$58/ { pop ax}
$86/$C4/ { xchg ah,al}
$86/$D6); { xchg dh,dl}
Function DictionaryBytes(MaxKeys : LongInt; BitsPerKey : Byte) : LongInt;
Begin
DictionaryBytes := Round(MaxKeys * BitsPerKey / (-Ln(0.5) * 8));
End;
Function DictHash(Var Data; Len : Word) : LongInt;
Begin
DictHash := Hash(Hash(HashSeed, Data, Len), Data, Len);
End;
Constructor Dictionary.Init(MaxKeys : Word; BitsPerKey : Byte);
Var
DictBytes : LongInt;
Begin
DictBytes := DictionaryBytes(MaxKeys, BitsPerKey);
If DictBytes > $FFF0 Then Begin
WriteLn(DictBytes, ' bytes optimal for dictionary, but ', $FFF0,
' is maximum size dictionary. Using max size.');
DictBytes := $FFF0;
End Else If DictBytes > MaxAvail Then Begin
WriteLn(DictBytes, ' bytes optimal for dictionary, but only ', MaxAvail,
' bytes are available. Using ', MaxAvail);
DictBytes := MaxAvail;
End Else If DictBytes < 16 Then DictBytes := 16;
DictSize := DictBytes;
GetMem(DictArray, DictSize);
FillChar(DictArray^, DictSize, 0);
DictCount := 0;
DictBits := BitsPerKey;
End;
Constructor Dictionary.RestoreDictionary(FileName : String);
Var
Header : SaveFileHeader;
DictBytes : LongInt;
f : File;
OldMode : Byte;
Begin
OldMode := FileMode;
FileMode := $40;
Assign(f, FileName);
Reset(f, 1);
BlockRead(f, Header, SizeOf(Header));
With Header Do Begin
Magic := LongSwap(Magic);
Size := Swap(Size);
DictBytes := FileSize(f) - SizeOf(Header);
If (Magic <> MagicNumber) Or (Size <> DictBytes) Or (Size < 16)
Or (Size > $FFF0) Then Begin
WriteLn('File ', FileName, ' is not a dictionary save file.');
Halt(1);
End;
DictSize := Size;
DictBits := BitsCount And $FF;
DictCount := LongSwap(BitsCount And $FFFFFF00);
GetMem(DictArray, DictSize);
BlockRead(f, DictArray^, DictSize);
Close(f);
FileMode := OldMode;
End;
End;
Destructor Dictionary.Done;
Begin
FreeMem(DictArray, DictSize);
DictArray := Nil;
DictSize := 0;
DictBits := 0;
DictCount := 0;
End;
Function Dictionary.DictionarySize : Word;
Begin
DictionarySize := DictSize + SizeOf(SaveFileHeader);
End;
Function Dictionary.InsertString(Var s : String) : Boolean;
Begin
InsertString := InsertBlock(s[1], Length(s));
End;
Function Dictionary.StringInDictionary(Var s : String) : Boolean;
Begin
StringInDictionary := BlockInDictionary(s[1], Length(s));
End;
Function Dictionary.InsertBlock(Var Data; Len : Word) : Boolean;
Begin
InsertBlock := InsertHash(DictHash(Data, Len));
End;
Function Dictionary.BlockInDictionary(Var Data; Len : Word) : Boolean;
Begin
BlockInDictionary := HashInDictionary(DictHash(Data, Len));
End;
Function Dictionary.InsertHash(Hash : LongInt) : Boolean;
Var
i : Byte;
InDict : Boolean;
Begin
InDict := True;
RandSeed := Hash;
Shuffle;
For i := 1 To DictBits Do
If Not SetBit(DictArray^, DictSize, SIRand) Then InDict := False;
If Not InDict Then Inc(DictCount);
InsertHash := InDict;
End;
Function Dictionary.HashInDictionary(Hash : LongInt) : Boolean;
Var
i : Byte;
InDict : Boolean;
Begin
InDict := True;
RandSeed := Hash;
Shuffle;
i := 0;
While (i < DictBits) And InDict Do Begin
If Not TestBit(DictArray^, DictSize, SIRand) Then InDict := False;
Inc(i);
End;
HashInDictionary := InDict;
End;
Procedure Dictionary.SaveDictionary(FileName : String);
Var
Header : SaveFileHeader;
f : File;
Begin
Assign(f, FileName);
ReWrite(f, 1);
With Header Do Begin
Magic := LongSwap(MagicNumber);
Size := Swap(DictSize);
BitsCount := LongSwap(DictCount) + DictBits;
End;
BlockWrite(f, Header, SizeOf(Header));
BlockWrite(f, DictArray^, DictSize);
Close(f);
End;
Function Dictionary.EstError : Real;
Begin
EstError := Exp(Ln(1.0-Exp(-(DictCount*DictBits)/(DictSize*8.0)))*DictBits);
End;
Function Dictionary.ActError : Real;
Var
AllBits, BitsOn, i : LongInt;
Begin
AllBits := LongInt(DictSize) * 8;
BitsOn := 0;
For i := 0 To Pred(AllBits) Do
If TestBit(DictArray^, DictSize, i) Then Inc(BitsOn);
ActError := Exp(Ln(BitsOn / AllBits) * DictBits);
End;
End.